using System;
using System.IO;
using System.Runtime.InteropServices;
using DynaPDF;

namespace text_extraction
{
	public class CTextExtraction
	{
	   private const double MAX_LINE_ERROR = 4.0; // This must be the square of the allowed error (2 * 2 in this case).

		internal CTextExtraction(CPDF PDFInst)
		{
         m_LastTextDir = TTextDir.tfNotInitialized;
         m_PDF         = PDFInst;
		}

      private int AddText()
      {
         TTextDir textDir;
         double x1 = 0.0;
         double y1 = 0.0;
         double x2 = 0.0;
         double y2 = m_Stack.FontSize;
         // Transform the text matrix to user space
         TCTM m = MulMatrix(m_Stack.ctm, m_Stack.tm);
         // Start point of the text record
         Transform(m, ref x1, ref y1);
         // The second point to determine the text direction can also be used to calculate
         // the visible font size measured in user space:
         //   double realFontSize = CalcDistance(x1, y1, x2, y2);

         Transform(m, ref x2, ref y2);
         // Determine the text direction
         if (y1 == y2)
            textDir = (TTextDir)((System.Convert.ToInt32(x1 > x2) + 1) << 1);
         else
            textDir = (TTextDir)System.Convert.ToInt32(y1 > y2);

         // Wrong direction or not on the same text line?
         if (textDir != m_LastTextDir || !IsPointOnLine(x1, y1, m_LastTextEndX, m_LastTextEndY, m_LastTextInfX, m_LastTextInfY))
         {
            // Extend the x-coordinate to an infinite point.
            m_LastTextInfX = 1000000.0;
            m_LastTextInfY = 0.0;
            Transform(m, ref m_LastTextInfX, ref m_LastTextInfY);
            if (m_LastTextDir != TTextDir.tfNotInitialized)
            {
               // Add a new line to the output file
               m_File.Write(System.Text.UnicodeEncoding.Unicode.GetBytes("\r\n"));
            }
         }else
         {
            /*
               The space width is measured in text space but the distance between two text
               records is measured in user space! We must transform the space width to user
               space before we can compare the values.
            */
            double distance, spaceWidth;
            // Note that we use the full space width here because the end position of the last record
            // was set to the record width minus the half space width.
            double x3 = m_Stack.SpaceWidth;
            double y3 = 0.0;
            Transform(m, ref x3, ref y3);
            spaceWidth = CalcDistance(x1, y1, x3 ,y3);
            distance   = CalcDistance(m_LastTextEndX, m_LastTextEndY, x1, y1);
            if (distance > spaceWidth)
            {
               // Add a space to the output file
               m_File.Write(System.Text.UnicodeEncoding.Unicode.GetBytes(" "));
            }
         }
         TTextRecordW rec = new TTextRecordW();
         long  ptr = (long)m_Stack.Kerning;
         float spw = -m_Stack.SpaceWidth * 0.5f;
         if (m_Stack.FontSize < 0.0f)
            spw = -spw;
         for (int i = 0; i < m_Stack.KerningCount; i++)
         {
            CPDF.CopyKernRecord(new IntPtr(ptr), ref rec, Marshal.SizeOf(rec));
            if (rec.Advance < spw)
            {
               // Add a space to the output file
               m_File.Write(System.Text.UnicodeEncoding.Unicode.GetBytes(" "));
            }
            ptr += Marshal.SizeOf(rec);
            m_File.Write(System.Text.UnicodeEncoding.Unicode.GetBytes(Marshal.PtrToStringUni(rec.Text, rec.Length)));
         }
         // We don't set the cursor to the real end of the string because applications like MS Word
         // add often a space to the end of a text record and this space can slightly overlap the next
         // record. IsPointOnLine() would return false if the new record overlaps the previous one.
         m_LastTextEndX = m_Stack.TextWidth + spw; // spw is a negative value!
         m_LastTextEndY = 0.0;
         m_LastTextDir  = textDir;
         // Calculate the end coordinate of the text record
         Transform(m, ref m_LastTextEndX, ref m_LastTextEndY);
         return 0;
      }

      private double CalcDistance(double x1, double y1, double x2, double y2)
      {
         double dx = x2-x1;
         double dy = y2-y1;
         return Math.Sqrt(dx*dx + dy*dy);
      }

      public void Close()
      {
         m_File.Flush();
         m_File.Close();
         m_File   = null;
         m_Stream = null;
      }

      private bool IsPointOnLine(double x, double y, double x0, double y0, double x1, double y1)
      {
         double dx, dy, di;
         x -= x0;
         y -= y0;
         dx = x1 - x0;
         dy = y1 - y0;
         di = (x*dx + y*dy) / (dx*dx + dy*dy);
         di = (di < 0.0) ? 0.0 : (di > 1.0) ? 1.0 : di;
         dx = x - di * dx;
         dy = y - di * dy;
         di = dx*dx + dy*dy;
         return (di < MAX_LINE_ERROR);
      }

      private TCTM MulMatrix(TCTM M1, TCTM M2)
      {
         TCTM retval;
         retval.a = M2.a * M1.a + M2.b * M1.c;
         retval.b = M2.a * M1.b + M2.b * M1.d;
         retval.c = M2.c * M1.a + M2.d * M1.c;
         retval.d = M2.c * M1.b + M2.d * M1.d;
         retval.x = M2.x * M1.a + M2.y * M1.c + M1.x;
         retval.y = M2.x * M1.b + M2.y * M1.d + M1.y;
         return retval;
      }

      public void Open(String FileName)
      {
         m_Stream = new FileStream(FileName, FileMode.Create, FileAccess.ReadWrite);
         m_File   = new BinaryWriter(m_Stream, System.Text.Encoding.Unicode);
         // Write a Little Endian marker to the file (byte order mark)
         m_File.Write(System.Text.UnicodeEncoding.Unicode.GetBytes("\uFEFF"));
      }

      public void ParsePage()
      {
         if (!m_PDF.InitStack(ref m_Stack)) return;
         m_LastTextEndX = 0.0;
         m_LastTextEndY = 0.0;
         m_LastTextDir  = TTextDir.tfNotInitialized;
         m_LastTextInfX = 0.0;
         m_LastTextInfY = 0.0;

         ParseText();
         ParseTemplates();
      }

      // Templates are parsed recursively.
      private void ParseTemplates()
      {
         int i, j, tmplCount, tmplCount2;
         tmplCount = m_PDF.GetTemplCount();
         for (i = 0; i < tmplCount; i++)
         {
            if (!m_PDF.EditTemplate(i))        return;
            if (!m_PDF.InitStack(ref m_Stack)) return;

            ParseText();

            tmplCount2 = m_PDF.GetTemplCount();
            for (j = 0; j < tmplCount2; j++)
            {
               ParseTemplates();
            }
            m_PDF.EndTemplate();
         }
      }

      private void ParseText()
      {
         bool haveMore;
         // Get the first text record if any
         haveMore = m_PDF.GetPageText(ref m_Stack);
         // No text found?
         if (haveMore == false && m_Stack.TextLen == 0) return;
         AddText();
         if (haveMore)
         {
            while (m_PDF.GetPageText(ref m_Stack))
            {
               AddText();
            }
         }
      }

      private void Transform(TCTM M, ref double x, ref double y)
      {
         double tx = x;
         x = tx * M.a + y * M.c + M.x;
         y = tx * M.b + y * M.d + M.y;
      }

      public void WritePageIdentifier(int PageNum)
      {
         if (PageNum > 1)
         {
            m_File.Write(System.Text.UnicodeEncoding.Unicode.GetBytes("\r\n"));
         }
         m_File.Write(System.Text.UnicodeEncoding.Unicode.GetBytes(String.Format("%----------------------- Page {0} -----------------------------\r\n", PageNum)));
      }

      protected enum TTextDir
      {
         tfLeftToRight    = 0,
         tfRightToLeft    = 1,
         tfTopToBottom    = 2,
         tfBottomToTop    = 4,
         tfNotInitialized = -1
      }
      protected BinaryWriter m_File;
      protected bool       m_HavePos;
      protected TTextDir   m_LastTextDir;
      protected double     m_LastTextEndX;
      protected double     m_LastTextEndY;
      protected double     m_LastTextInfX;
      protected double     m_LastTextInfY;
      internal  CPDF       m_PDF;
      protected TPDFStack  m_Stack;
      protected FileStream m_Stream;
	}
}
